package com.tl.spark.java;

import com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl;
import org.dom4j.Document;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.xml.sax.XMLReader;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.util.List;

/**
 * @program: spark-test
 * @description:
 * @author: dong.tl
 * @create: 2018-09-19 16:11
 **/
public class Dom4jParseXML {
    public static void main(String[] args) {
        try {
            SAXReader reader = new SAXReader();
            Document doc = reader.read("C:\\Users\\Administrator\\Desktop\\CN101986000000192CN00000861001920AFULZH19870722CN00P.XML");
//            Document doc = reader.read(new File("F:\\data\\200410103011.xml"));


//            List<Node> nodes = doc.selectNodes("//business:PatentDocumentAndRelated/business:BibliographicData/business:PublicationReference/base:DocumentID/base:WIPOST3Code/text()");
            List<Node> nodes = doc.selectNodes("/business:PatentDocumentAndRelated/business:BibliographicData/business:PublicationReference/base:DocumentID");
            String attr = "dataFormat";
            System.out.println(nodes.size());
            nodes.stream().forEach(node -> {
                System.out.println(node.selectNodes("base:WIPOST3Code/text()").get(0).getStringValue());
            });

        } catch (Exception e) {
            e.printStackTrace();
        }
    }


    public static XMLReader createXMLReader(boolean validating,
                                            boolean namespaceAware) throws Exception {
        //SAXParserFactory factory = SAXParserFactory.newInstance();
        SAXParserFactory factory = new SAXParserFactoryImpl();
        factory.setValidating(validating);
        factory.setNamespaceAware(namespaceAware);
        SAXParser parser = factory.newSAXParser();
        return parser.getXMLReader();

    }
}
